nusmodsnusmods API at https://nusmods.com/api/.# load bidding data
# calculate loading times
before <- Sys.time()
# read data directly from URL
myjson <- fromJSON(file = url("https://api.nusmods.com/corsBiddingStatsRaw.json"))
# create empty dataframe which will act as a container to be populated with data
myBid <- data.frame()
# for each element in the myjson list, append it to myBid
for(r in 1:length(myjson))
{
if(myjson[[r]]$Semester == 1 | myjson[[r]]$Semester == 2)
{
myBid <- rbind(myBid, myjson[[r]])
}
myjson[[r]] <- NA
}
# calculate loading time
after <- Sys.time()
after - before
# save
saveRDS(myBid, file = "myBid.RDS")myBid.RDS# create empty dataframe which will act as a container to be populated with data
myModInfo <- data.frame()
# looping through each year
for(year in c(2011:2018))
{
for(semester in c(1,2))
{
# create the url where data is to be extracted from
myurl <- paste0("https://api.nusmods.com/", year, "-", year + 1, "/", semester, "/moduleTimetableDeltaRaw.json")
myjson <- fromJSON(file = url(myurl))
# for each element in the myjson list, append it to myModInfo
for(r in 1:length(myjson))
{
if(isTRUE(str_detect(myjson[[r]]$ModuleCode, "^PL")))
{
if(myjson[[r]]$Semester == 1 | myjson[[r]]$Semester == 2)
{
myModInfo <- rbind(myModInfo, myjson[[r]])
}
}
myjson[[r]] <- NA
}
cat(year, "Semester", semester, "Done!")
}
}
# save
saveRDS(myModInfo, file = "myModInfo.RDS")myModInfo.RDSmyModInfomyModInfo.
# only keep the Psychology modules information
myModInfo <- subset(myModInfo,
str_detect(myModInfo$ModuleCode, "^PL"))
# remove information about tutorials
myModInfo <- subset(myModInfo,
myModInfo$LessonType != "TUTORIAL")
# only keep these columns
myModInfo <- myModInfo[,grep("ModuleCode|DayText|StartTime|Semester|AcadYear", names(myModInfo))]
# remove duplicated rows based on columns of ModuleCode, Acadyear, Semester, StartTime and DayText
myModInfo <- distinct(myModInfo,
ModuleCode, AcadYear, Semester, StartTime, DayText)myBidmyBid.
# remove non-psychology modules
myBid <- subset(myBid,
# only keep rows where module code begins with PL
str_detect(myBid$ModuleCode, "^PL"))
# also remove Roots and Wings (PLS8001) and psychology for non-psych students (PLB1201)
myBid <- subset(myBid,
!str_detect(myBid$ModuleCode, "PLS|PLB"))
# remove the rounds where it was reserved
myBid <- subset(myBid,
!str_detect(myBid$StudentAcctType, "Reserved"))
# remove information from bidding rounds involving [G] accounts
myBid <- subset(myBid,
!str_detect(myBid$StudentAcctType, "[G]"))
# remove unneeded columns
myBid <- myBid[, -grep("Group|Faculty", names(myBid))]Level that denotes whether the module is Level 1, 2, 3 or 4.BpQ that represents Bids per Quota, which is the number of bidders for each available quota of the module, derived from Bidders and Quota. Used as a measure of the popularity of a module, Higher BpQ signifies greater popularity.LessonTime that denotes whether the lecture begins in the morning (before 12pm), in the afternoon (12pm to 4pm), in the evening (after 4pm).myModInfo to myBid.# create new column that indicates the level of the module, based on their module code
myBid$Level <- ifelse(str_detect(myBid$ModuleCode, "1[0-9][0-9][0-9]"), "Level 1",
ifelse(str_detect(myBid$ModuleCode, "2[0-9][0-9][0-9]"), "Level 2",
ifelse(str_detect(myBid$ModuleCode, "3[0-9][0-9][0-9]"), "Level 3",
ifelse(str_detect(myBid$ModuleCode, "4[0-9][0-9][0-9]"), "Level 4",
"Graduate Module"))))
# crosstabs to doublecheck
# xtabs( ~ ModuleCode + Level,
# data = myBid, subset = NULL)# create new column Bids Per Quota (BpQ)
myBid$BpQ <- as.numeric(myBid$Bidders)/as.numeric(myBid$Quota)# transform these columns to numeric
for(r in c("Quota", "Bidders", "LowestBid", "LowestSuccessfulBid", "HighestBid", "StartTime"))
{
mydata[,grep(r, names(mydata))] <- as.numeric(mydata[,grep(r, names(mydata))])
}
# transform these columns to factors
for(r in c("AcadYear", "Semester", "ModuleCode", "Round", "Level", "StudentAcctType", "DayText", "LessonTime"))
{
mydata[,grep(r, names(mydata))] <- factor(mydata[,grep(r, names(mydata))])
}# create vector of the column names which are factors
facnames <- names(select_if(mydata, is.factor))
# factor names without ModuleCode and StudentAcctType
facnames.mod <- facnames[-grep("ModuleCode|StudentAcctType", facnames)]
# create vector of the column names which are numeric
numnames <- names(select_if(mydata, is.numeric))
# numeric names without StartTime
numnames.time <- names(select_if(mydata, is.numeric))[-grep("StartTime", numnames)]DayText LevelsLessonTime LevelsBidders is calculated across all academic years, all bidding rounds, all modules…## AcadYear Semester Round ModuleCode Level StartTime DayText LessonTime Quota Bidders LowestBid LowestSuccessfulBid HighestBid BpQ
## 2013/2014:357 1:950 1A:509 PL1101E: 85 Level 1: 85 Min. : 800 Monday :341 Morning : 518 Min. : 1.00 Min. : 0.00 Min. : 0.000 Min. : 0.0 Min. : 0.0 Min. : 0.00000
## 2015/2016:322 2:913 1B:303 PL3232 : 56 Level 2: 89 1st Qu.:1100 Tuesday :365 Afternoon:1130 1st Qu.: 3.00 1st Qu.: 1.00 1st Qu.: 0.625 1st Qu.: 0.5 1st Qu.: 1.0 1st Qu.: 0.02667
## 2014/2015:294 1C:166 PL3235 : 55 Level 3:925 Median :1300 Wednesday:477 Evening : 215 Median : 14.50 Median : 3.50 Median : 1.000 Median : 1.0 Median : 400.0 Median : 0.35714
## 2012/2013:237 2A:283 PL3234 : 54 Level 4:764 Mean :1301 Thursday :402 Mean : 23.49 Mean : 11.93 Mean : 75.912 Mean : 261.6 Mean : 742.3 Mean : 1.03981
## 2016/2017:227 2B:291 PL3236 : 54 3rd Qu.:1500 Friday :278 3rd Qu.: 30.50 3rd Qu.: 9.00 3rd Qu.: 10.000 3rd Qu.: 205.5 3rd Qu.:1215.2 3rd Qu.: 1.20000
## 2011/2012:166 3A:162 PL3233 : 53 Max. :1900 Max. :410.00 Max. :222.00 Max. :2430.000 Max. :3459.0 Max. :4801.0 Max. :15.00000
## (Other) :260 3B:149 (Other):1506
# plot the categorical variables
# note: I did not include ModuleCode in this exploratory graph because it has too many levels (83)
for(r in facnames.mod)
{
cat(paste0("Histogram Of ", r))
plot(
ggplot(data = mydata, aes_string(x = r, fill = r)) +
geom_histogram(stat = "count") +
ylab("Count") +
ggtitle(paste0("Count of ", r)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, size = 6, vjust = -0.3),
axis.title.x = element_blank())
)
}## Histogram Of AcadYear
## Histogram Of Semester
## Histogram Of Round
## Histogram Of Level
## Histogram Of DayText
## Histogram Of LessonTime
# plot the continuous variables
for(r in numnames)
{
cat(paste0("Histogram Of ", r))
plot(
ggplot(data = mydata, aes_string(x = r, fill = r)) +
geom_histogram(bins = 90, fill = "violetred") +
ylab("Histogram") +
ggtitle(paste0("Frequency of ", r)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, size = 6, vjust = -0.3),
axis.title.x = element_text())
)
}## Histogram Of StartTime
## Histogram Of Quota
## Histogram Of Bidders
## Histogram Of LowestBid
## Histogram Of LowestSuccessfulBid
## Histogram Of HighestBid
## Histogram Of BpQ
# create vector to loop across
for(r in 1:length(facnames.mod))
{
for(i in 1:length(facnames.mod))
{
# dont do anything if they are the same or the graph has been made before
if(i == r | i < r)
{
} else {
cat(paste0(facnames.mod[r]," ~ ",facnames.mod[i]))
# create formula for xtabs
tempform <- paste0("~ ", facnames.mod[r], " + ", facnames.mod[i])
# temp is a dataframe that is only going to exist in this section
# and overwritten with each loop
temp <- as.data.frame(xtabs(eval(parse(text = tempform)),
data = mydata,
subset = NULL))
plot(
ggplot(data = temp, aes_string(x = facnames.mod[r], y = facnames.mod[i], fill = "Freq", label = "Freq")) +
geom_tile() +
geom_text() +
scale_fill_gradient(low = "white", high = "violetred") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = -0.3),
legend.position = "none")
)
}
}
}## AcadYear ~ Semester
## AcadYear ~ Round
## AcadYear ~ Level
## AcadYear ~ DayText
## AcadYear ~ LessonTime
## Semester ~ Round
## Semester ~ Level
## Semester ~ DayText
## Semester ~ LessonTime
## Round ~ Level
## Round ~ DayText
## Round ~ LessonTime
## Level ~ DayText
## Level ~ LessonTime
## DayText ~ LessonTime
for(r in 1:length(numnames))
{
for(i in 1:length(numnames))
{
# dont do anything if they are the same or the graph has been made before
if(i == r | i < r)
{
} else {
cat(paste0(numnames[r]," ~ ",numnames[i]))
# create formula for lm()
tempform.std <- paste0("scale(", numnames[i],")", " ~ ", "scale(", numnames[r], ")")
tempform <- paste0(numnames[i], " ~ ", numnames[r])
# regress to get best fit line
# standardized
stdreg <- lm(eval(parse(text = tempform.std)),
data = mydata)
# unstandardized
reg <- lm(eval(parse(text = tempform)),
data = mydata)
plot(
ggplot(data = mydata, aes_string(x = numnames[r], y = numnames[i])) +
geom_point(color = "violetred", size = 2, alpha = 0.3) +
theme_classic() +
geom_abline(slope = reg$coefficients[2], intercept = reg$coefficients[1], lty = "dashed") +
geom_label(aes(x = Inf, y = Inf, label = paste0("Standardized Regression Coefficient = ",
round(stdreg$coefficients[2],3)),
hjust = 1, vjust = 2)) +
theme(axis.text.x = element_text(angle = 90, vjust = -0.3))
)
}
}
}## StartTime ~ Quota
## StartTime ~ Bidders
## StartTime ~ LowestBid
## StartTime ~ LowestSuccessfulBid
## StartTime ~ HighestBid
## StartTime ~ BpQ
## Quota ~ Bidders
## Quota ~ LowestBid
## Quota ~ LowestSuccessfulBid
## Quota ~ HighestBid
## Quota ~ BpQ
## Bidders ~ LowestBid
## Bidders ~ LowestSuccessfulBid
## Bidders ~ HighestBid
## Bidders ~ BpQ
## LowestBid ~ LowestSuccessfulBid
## LowestBid ~ HighestBid
## LowestBid ~ BpQ
## LowestSuccessfulBid ~ HighestBid
## LowestSuccessfulBid ~ BpQ
## HighestBid ~ BpQ
corrplot.mixed(cor(mydata[,grep(paste0(numnames.time, collapse = "|"), names(mydata))]),
upper = "color",
tl.pos = "lt",
tl.cex = 0.5,
cl.cex = 0.5)for(r in facnames.mod)
{
for(i in numnames)
{
cat(paste0(r," ~ ",i))
# graph
plot(
ggplot(data = mydata, aes_string(x = r, y = i, fill = r)) +
geom_boxplot() +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = -0.3))
)
}
}## AcadYear ~ StartTime
## AcadYear ~ Quota
## AcadYear ~ Bidders
## AcadYear ~ LowestBid
## AcadYear ~ LowestSuccessfulBid
## AcadYear ~ HighestBid
## AcadYear ~ BpQ
## Semester ~ StartTime
## Semester ~ Quota
## Semester ~ Bidders
## Semester ~ LowestBid
## Semester ~ LowestSuccessfulBid
## Semester ~ HighestBid
## Semester ~ BpQ
## Round ~ StartTime
## Round ~ Quota
## Round ~ Bidders
## Round ~ LowestBid
## Round ~ LowestSuccessfulBid
## Round ~ HighestBid
## Round ~ BpQ
## Level ~ StartTime
## Level ~ Quota
## Level ~ Bidders
## Level ~ LowestBid
## Level ~ LowestSuccessfulBid
## Level ~ HighestBid
## Level ~ BpQ
## DayText ~ StartTime
## DayText ~ Quota
## DayText ~ Bidders
## DayText ~ LowestBid
## DayText ~ LowestSuccessfulBid
## DayText ~ HighestBid
## DayText ~ BpQ
## LessonTime ~ StartTime
## LessonTime ~ Quota
## LessonTime ~ Bidders
## LessonTime ~ LowestBid
## LessonTime ~ LowestSuccessfulBid
## LessonTime ~ HighestBid
## LessonTime ~ BpQ